// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#ifndef SHAVE_METADATA_H_INCLUDED
#define SHAVE_METADATA_H_INCLUDED

#include <cstdint>


enum {
  md_invalid_index = ~0u,
};

enum md_version_t {
  md_version_1_0 = 0x00010000,  // version 1.0
  md_version_1_1 = 0x00010001,  // version 1.1
  md_version_1_2 = 0x00010002,  // version 1.2
  md_version_latest = md_version_1_2
};

struct md_header_t {
  uint32_t version;             // 0xFFFF0000 = Major   0x0000FFFF = Minor

  // md_kernel_descriptor_t array info
  uint32_t kernel_count;        // number of kernels in the .metadata
  uint32_t kernel_first;        // absolute byte offset to first
                                // md_kernel_descriptor_t from start of .metadata

  // md_kernel_argument_t array info
  uint32_t arg_count;           // number of arguments in the .metadata
  uint32_t arg_first;           // absolute byte offset to first
                                // md_kernel_argument_t from start of .metadata

  // md_kernel_sipp_info_t array info
  uint32_t sipp_info_count;     // number of sipp dma infos in .metadata
  uint32_t sipp_info_first;     // absolute byte offset to first
                                // md_kernel_sipp_info_t from start of .metadata

  // md_expr_t array info
  uint32_t expr_count;          // number of expressions in .metadata
  uint32_t expr_first;          // absolute byte offset to first
                                // kernel_expr_t from start of .metadata

  // md_expr_node_t array info
  uint32_t expr_node_count;     // number of expression nodes in .metadata
  uint32_t expr_node_first;     // absolute byte offset to first md_expr_node_t
                                // from start of .metadata

  // function table
  uint32_t func_count;          // number of functions in the function table
  uint32_t func_first;          // absolute byte offset to the first md_function_t
};

struct md_function_t {
  uint32_t load_address;        // runtime address of a kernel function
};

struct md_kernel_variant_t {
  uint32_t name;                // offset into the string table of the kernel name
  uint32_t factor;              // vector width / unroll factor
  uint32_t func;                // index into the kernel function table
};

enum md_kernel_variant_type_t {
  md_variant_scalar = 0,           // basic scalar kernel
  md_variant_vectorized,           // kernel has been vectorized
  md_variant_unrolled,             // kernel has been loop unrolled
  md_variant_sipp_dma,             // sipp dma kernel
  md_variant_sipp_dma_vectorized,  // vectorized sipp dma kernel
  md_variant_dma_preload,          // kernel preload function
  md_variant_dma_postwrite,        // kernel postwrite function
  md_variant_dma_fallback,         // kernel fallback function
  md_VARIANT_COUNT
};

constexpr int kVariantCount = md_VARIANT_COUNT;

enum md_kernel_flags_t {
  md_kernel_flags_ddr_write         = 1u,   // kernel writes to DDR memory
  md_kernel_flags_ddr_read          = 2u,   // kernel reads from DDR memory
  md_kernel_flags_generated_prepost = 4u,   // kernel has an autogenerated prepost
};

struct md_kernel_descriptor_t {
  uint32_t flags;               // combination of md_kernel_flags_t

  uint32_t arg_count;           // number of arguments for this kernel
  uint32_t arg_index;           // index of first kernel_argument_t

  uint32_t sipp_dma_in_count;   // number of SIPP dma input arguments (or 0 if no SIPP dma)
  uint32_t sipp_dma_out_count;  // number of SIPP dma output arguments (or 0 if no SIPP dma)
  uint32_t sipp_info_index;     // index into the kernel_sipp_info_t list

  uint32_t name;                // metadata string table offset for kernel name

  uint32_t stack_size_wg;       // estimate of stack usage per work group (fixed)
  uint32_t stack_size_wi;       // estimate of stack usage per work item

                                // kernel variant list
  md_kernel_variant_t variant[kVariantCount];
};

enum md_arg_addr_space_t {
  md_addr_space_private = 0,
  md_addr_space_global,         // global address space (ddr)
  md_addr_space_constant,       //
  md_addr_space_local,          // local address space (cmx)

  md_addr_space_undef,          // none of the others
};

enum md_arg_flags_t {
  md_arg_flags_dma_input         = 1u,   // local argument is being read from
  md_arg_flags_dma_output        = 2u,   // local argument is being written to
  md_arg_flags_dma_double_buffer = 4u,   // local argument should be double buffered
  md_arg_flags_generated_prepost = 8u,   // preload and post write are auto generated
};

struct md_kernel_argument_t {
  uint32_t flags;                  // bitfield of md_arg_flags_t
  uint32_t name;                   // argument name
  uint32_t array_size_expr;        // index to a `kernel_expr_t` type for evaluating total number of element
  uint32_t size_elm;               // size in bytes of the underlying element
  md_arg_addr_space_t addr_space;  // the arguments address space
  uint32_t alignment;              // alignment require in bytes
  uint32_t arg_pack_offset;        // offset into the argument pack
};

struct md_kernel_sipp_info_t {
  uint32_t num_dims;            // number of dimensions of the dma
  uint32_t span_x;
  uint32_t span_y;

  // below are all indexes to a 'kernel_expr_t'
  uint32_t elm_size;            // size in bytes of the element
  uint32_t stride_y;            // stride in elm_size in y axis
  uint32_t stride_z;            //                       z
  uint32_t base;                // address of the base of the buffer
  uint32_t size_x;              // size in elements for x dim
  uint32_t size_y;              //                      y
  uint32_t size_z;              //                      z
  uint32_t max_x;               // max work item index in x dim
  uint32_t max_y;               //                        y
  uint32_t max_z;               //                        z
};

enum md_expr_node_type_t {
  md_type_global_size = 0,      // global work size
  md_type_local_size,           // local work size
  md_type_param,                // kernel parameter
  md_type_immediate,            // uint32_t immediate value

  md_type_op_umul,              // unsigned multiply
  md_type_op_udiv,              // unsigned divide

  md_type_op_add,               // add
  md_type_op_sub,               // subtract

  md_type_op_min,               // signed min
  md_type_op_max,               // signed max
  md_type_op_umin,              // unsigned min
  md_type_op_umax,              // unsigned max

  md_type_op_and,               // bitwise and
  md_type_op_or,                // bitwise or
  md_type_op_xor,               // bitwise xor

  md_type_op_shl,               // left shift
  md_type_op_lshr,              // right shift

  // more operators as needed
  // ...
};

struct md_expr_node_t {
  md_expr_node_type_t type;     // type of this expression node
  uint32_t value;               // immediate or operand
};

struct md_expr_t {
  uint32_t node_count;          // number of md_expr_node_t's that make up this
                                // expression
  uint32_t node_first;          // index of the first md_expr_node_t that
                                // is part of this expression
};

#endif  // SHAVE_METADATA_H_INCLUDED
